set more off
if "`c(username)'"=="mohitkarnani"  {
	global dir = "/Users/mohitkarnani/Documents/Platform-Externalities/private_data"
	global pathCode = "/Users/mohitkarnani/Documents/Platform-Externalities/for_publication/code"
}

global pathRawData="$dir/1_data"
global pathIntermediateData="$dir/3_intermediate"
global pathCleanData="$dir/4_clean"

*** Adjust crosswalks
use "$pathRawData/crosswalk_2010_2012.dta", clear
drop if codigo_unico_2010==""
gsort codigo_unico_2010 -codigo_demre_2012
bys codigo_unico_2010: gen auy=_n
keep if auy==1 //prioritizes 2012 demre code
drop au* 
destring codigo_demre*, replace
compress
save "$pathIntermediateData/crosswalk_2010_2012.dta", replace

use "$pathRawData/crosswalk_2011_2012.dta", clear
drop if codigo_unico_2011==""
gsort codigo_unico_2011 -codigo_demre_2012
bys codigo_unico_2011: gen auy=_n
keep if auy==1
drop au* 
destring codigo_demre*, replace
compress
save "$pathIntermediateData/crosswalk_2011_2012.dta", replace

use "$pathRawData/crosswalk_2012.dta", clear
keep codigo_unico_2012 codigo_demre_2012
destring codigo_demre*, replace
compress
save "$pathIntermediateData/crosswalk_2012_2012.dta", replace

use "$pathRawData/crosswalk_2013_2012.dta", clear
drop if codigo_unico_2013==""
gsort codigo_unico_2013 -codigo_demre_2012
bys codigo_unico_2013: gen auy=_n
keep if auy==1
drop au* 
destring codigo_demre*, replace
keep codigo_*
compress
save "$pathIntermediateData/crosswalk_2013_2012.dta", replace

*** Import data
forvalues y=2007/2017 {
	import delimited "$pathRawData/Matricula Mineduc/Matricula Ed_Superior `y'/Matr°cula_Ed_Superior_`y'.csv", delimiter(";") clear
	#delimit ;
	keep cat_periodo codigo_unico mrun gen_alu anio_mat_pri_anio anio_ing_carrera
		tipo_inst_3 nomb_inst dur_total_carr valor_matricula valor_arancel
		codigo_demre nomb_carrera area_carrera_generica;
	gen g8=inlist(nomb_inst, "UNIVERSIDAD ADOLFO IBAÑEZ",
					"UNIVERSIDAD ALBERTO HURTADO",
					"UNIVERSIDAD ANDRES BELLO",
					"UNIVERSIDAD DE LOS ANDES",
					"UNIVERSIDAD DEL DESARROLLO",
					"UNIVERSIDAD DIEGO PORTALES",
					"UNIVERSIDAD FINIS TERRAE",
					"UNIVERSIDAD MAYOR");
	gen g25=inlist(tipo_inst_3, "Universidades Estatales CRUCH", 
					"Universidades Privadas CRUCH");
	#delimit cr
	destring *, replace
	replace valor_matricula=0 if cat_periodo==2010 & nomb_inst=="UNIVERSIDAD DE LA SERENA"
	keep if mrun!=. & (g8 | g25)
	gen enroll_year=anio_mat_pri_anio
	replace enroll_year=anio_ing_carrera if anio_mat_pri_anio > anio_ing_carrera
	
	gen univ=.
	replace univ=11 if nomb_inst=="UNIVERSIDAD DE CHILE"
	replace univ=12 if nomb_inst=="PONTIFICIA UNIVERSIDAD CATOLICA DE CHILE"
	replace univ=13 if nomb_inst=="UNIVERSIDAD DE CONCEPCION"
	replace univ=14 if nomb_inst=="PONTIFICIA UNIVERSIDAD CATOLICA DE VALPARAISO"
	replace univ=15 if nomb_inst=="UNIVERSIDAD TECNICA FEDERICO SANTA MARIA"
	replace univ=16 if nomb_inst=="UNIVERSIDAD DE SANTIAGO DE CHILE"
	replace univ=17 if nomb_inst=="UNIVERSIDAD AUSTRAL DE CHILE"
	replace univ=18 if nomb_inst=="UNIVERSIDAD CATOLICA DEL NORTE"
	replace univ=19 if nomb_inst=="UNIVERSIDAD DE VALPARAISO"
	replace univ=20 if nomb_inst=="UNIVERSIDAD METROPOLITANA DE CIENCIAS DE LA EDUCACION"
	replace univ=21 if nomb_inst=="UNIVERSIDAD TECNOLOGICA METROPOLITANA"
	replace univ=22 if nomb_inst=="UNIVERSIDAD DE TARAPACA"
	replace univ=23 if nomb_inst=="UNIVERSIDAD ARTURO PRAT"
	replace univ=24 if nomb_inst=="UNIVERSIDAD DE ANTOFAGASTA"
	replace univ=25 if nomb_inst=="UNIVERSIDAD DE LA SERENA"
	replace univ=26 if nomb_inst=="UNIVERSIDAD DE PLAYA ANCHA DE CIENCIAS DE LA EDUCACION"
	replace univ=27 if nomb_inst=="UNIVERSIDAD DE ATACAMA"
	replace univ=29 if nomb_inst=="UNIVERSIDAD DEL BIO-BIO"
	replace univ=30 if nomb_inst=="UNIVERSIDAD DE LA FRONTERA"
	replace univ=32 if nomb_inst=="UNIVERSIDAD DE LOS LAGOS"
	replace univ=33 if nomb_inst=="UNIVERSIDAD DE MAGALLANES"
	replace univ=34 if nomb_inst=="UNIVERSIDAD DE TALCA"
	replace univ=35 if nomb_inst=="UNIVERSIDAD CATOLICA DEL MAULE"
	replace univ=36 if nomb_inst=="UNIVERSIDAD CATOLICA DE LA SANTISIMA CONCEPCION"
	replace univ=37 if nomb_inst=="UNIVERSIDAD CATOLICA DE TEMUCO"
	replace univ=38 if nomb_inst=="UNIVERSIDAD DIEGO PORTALES"
	replace univ=39 if nomb_inst=="UNIVERSIDAD MAYOR"
	replace univ=40 if nomb_inst=="UNIVERSIDAD FINIS TERRAE"
	replace univ=41 if nomb_inst=="UNIVERSIDAD ANDRES BELLO"
	replace univ=42 if nomb_inst=="UNIVERSIDAD ADOLFO IBAÑEZ"
	replace univ=43 if nomb_inst=="UNIVERSIDAD DE LOS ANDES"
	replace univ=44 if nomb_inst=="UNIVERSIDAD DEL DESARROLLO"
	replace univ=45 if nomb_inst=="UNIVERSIDAD ALBERTO HURTADO"
	
	drop anio_* tipo_inst_3 nomb_inst
	replace gen_alu=0 if gen_alu==2
	rename (cat_periodo gen_alu) (year male)
	compress //allow duplicates because we will merge by mrun and codigo_demre
	foreach v in * {
		rename `v' e_`v'
	}
	save "$pathIntermediateData/enrollment_mrun_`y'", replace
	if `y'<2013 & `y'>2009 {
		keep if e_year == e_enroll_year 
		duplicates tag e_mrun, gen(multi_enroll)
		bys e_mrun: gen aux=_n
		drop if aux>1 //to future self: count if codigo_demre !=. & aux==1
		drop aux
		rename e_codigo_unico codigo_unico_`y'
		merge m:1 codigo_unico_`y' using "$pathIntermediateData/crosswalk_`y'_2012", nogen keep(1 3)
		compress
		foreach v in * {
			rename `v' f`v'
		}
		save "$pathIntermediateData/fenrollment_mrun_`y'", replace	
	}
}

forvalues y=2007/2012 {
	import delimited "$pathRawData/Matricula Mineduc/Matricula Ed_Superior `y'/Matr°cula_Ed_Superior_`y'.csv", delimiter(";") clear
	keep mrun
	destring *, replace
	save "$pathIntermediateData/past_enrollment_mrun_`y'", replace
	import delimited "$pathRawData/Matricula Platform/D_MATRICULA_PSU_`y'_PRIV_MRUN.csv", delimiter(";") clear
	keep mrun
	destring *, replace
	append using "$pathIntermediateData/past_enrollment_mrun_`y'"
	duplicates drop
	drop if mi(mrun)
	compress
	save "$pathIntermediateData/past_enrollment_mrun_`y'", replace
}

forvalues y=2007/2017 {
	import delimited "$pathRawData/Applications/C_POSTULACIONES_SELECCION_PSU_`y'_PRIV_MRUN.csv", delimiter(";") clear
	destring *, replace //
	keep if inrange(estado_preferencia,24,26) & mrun!=.
	gen used_previous=(pond_año_acad==2)
	#delimit ;
	keep año_proceso mrun preferencia codigo_carrera sigla_universidad 
		estado_preferencia puntaje lugar used_previous;
	#delimit cr
	gen univ=floor(codigo_carrera/(10^(-1+floor(log10(codigo_carrera)))))
	gen g8=(univ>37)
	gen g25=1-g8
	rename (año_proceso preferencia) (year pref)
	compress
	foreach v in * {
		rename `v' a_`v'
	}
	save "$pathIntermediateData/apps_mrun_`y'", replace
}

forvalues y=2007/2017 {
	import delimited "$pathRawData/Matricula Platform/D_MATRICULA_PSU_`y'_PRIV_MRUN.csv", delimiter(";") clear
	destring *, replace //
	keep if mrun!=.
	#delimit ;
	keep año_proceso mrun preferencia codigo_carrera sigla_universidad 
		puntaje_ponderado lugar;
	#delimit cr
	gen univ=floor(codigo_carrera/(10^(-1+floor(log10(codigo_carrera)))))
	gen g8=(univ>37)
	gen g25=1-g8
	rename (año_proceso preferencia) (year pref)
	compress
	foreach v in * {
		rename `v' pe_`v'
	}
	save "$pathIntermediateData/penrollment_mrun_`y'", replace
}

forvalues y=2007/2017 {
	import delimited "$pathRawData/Scores/A_INSCRITOS_PUNTAJES_PSU_`y'_PRIV_MRUN.csv", delimiter(";") clear
	gen current_cohort=(año_egreso==`y'-1)
	gen old_cohort1=(año_egreso<`y'-1)
	gen old_cohort2=(año_egreso<`y'-2)
	gen old_cohort3=(año_egreso<`y'-3)
	gen old_cohort4=(año_egreso<`y'-4)
	gen old_cohort5=(año_egreso<`y'-5)
	#delimit ;
	keep mrun año_proceso cod_sexo fecha_nacimiento rbd rama_educacional 
		grupo_dependencia codigo_region año_egreso ptje_nem lyc_actual mate_actual 
		hycs_actual ciencias_actual bea current_cohort old_cohort*;
	#delimit cr
	gen tp=(substr(rama_educacional,1,1)=="T")
	gen private=(grupo_dependencia==4)
	drop rama_educacional grupo_dependencia
	replace cod_sexo=0 if cod_sexo==2
	rename (año_proceso cod_sexo ptje_nem lyc_actual mate_actual hycs_actual ciencias_actual) ///
		(year male gpa lang math hist sci)
	destring *, replace
	drop if mrun==.
	compress
	foreach v in * {
		rename `v' s_`v'
	}
	save "$pathIntermediateData/students_mrun_`y'", replace
}

forvalues y=2008/2012 {
    import delimited "$pathRawData/Financial Aid/Asignacion `y'.csv", delimiter(";") clear
	drop anio_beneficio
	rename (beneficio_becaofscu) (aid_type)
	sort mrun aid_type
	bysort mrun: replace aid_type=aid_type[_n-1]+"_"+aid_type if _n>1
	by mrun: keep if _n==_N
	destring *, replace
	drop if mrun==.
	compress
	foreach v in * {
		rename `v' f_`v'
	}
	save "$pathIntermediateData/aid_mrun_`y'", replace
}

forvalues y=2007/2019 {
	import delimited "$pathRawData/Titulados/Titulados_Educacion_Superior_`y'.csv", delimiter(";") clear
	#delimit ;
	keep cat_periodo codigo_unico mrun gen_alu fec_nac_alu año_ing_pri_año 
		año_ing_carr nomb_inst dur_total_carr tipo_inst_3;
	gen year=substr(cat_periodo,strrpos(cat_periodo,"_")+1,
		strlen(cat_periodo)-strrpos(cat_periodo,"_"));
	replace codigo_unico=substr(codigo_unico,strrpos(codigo_unico,"_")+1,
		strlen(codigo_unico)-strrpos(codigo_unico,"_"));
	gen g8=inlist(nomb_inst, "UNIVERSIDAD ADOLFO IBAÑEZ",
					"UNIVERSIDAD ALBERTO HURTADO",
					"UNIVERSIDAD ANDRES BELLO",
					"UNIVERSIDAD DE LOS ANDES",
					"UNIVERSIDAD DEL DESARROLLO",
					"UNIVERSIDAD DIEGO PORTALES",
					"UNIVERSIDAD FINIS TERRAE",
					"UNIVERSIDAD MAYOR");
	gen g25=inlist(tipo_inst_3, "UNIVERSIDADES ESTATALES CRUCH", 
					"UNIVERSIDADES PRIVADAS CRUCH");
	#delimit cr
	destring *, replace
	keep if g8 | g25 & mrun!=.
	replace gen_alu=0 if gen_alu==2
	rename (gen_alu año_ing_pri_año) (male enroll_year)
	replace enroll_year=año_ing_carr if enroll_year>año_ing_carr
	drop cat_periodo año_ing_carr
	compress
	foreach v in * {
		rename `v' g_`v'
	}
	save "$pathIntermediateData/grads_mrun_`y'", replace
}

clear
forvalues y=2007/2019 {
	append using "$pathIntermediateData/grads_mrun_`y'"
}
compress
save "$pathIntermediateData/grads_mrun", replace

*** Merge data
forvalues y=2010/2012 {
	*** Students <-> Apps
	use "$pathIntermediateData/students_mrun_`y'", clear
	rename s_mrun a_mrun
	merge 1:m a_mrun using "$pathIntermediateData/apps_mrun_`y'"
	replace _merge=0 if _merge==3
	rename _merge no_app
	gen admit = a_estado_preferencia==24
	gsort a_mrun -admit
	bys a_mrun: gen aux=_n
	keep if aux==1 //warning: keeping only one line per app
	drop aux

	*** Students <-> Apps <-> Aid
	rename a_mrun f_mrun
	merge 1:1 f_mrun using "$pathIntermediateData/aid_mrun_`y'", keep(1 3)
	replace _merge=0 if _merge==3
	rename _merge no_aid //received aid on app-year? (doesn't consider future aid)
	gen cae_eligible=!s_private & (s_math+s_lang)/2>=475 if !mi(s_math) & !mi(s_lang)
	
	*** Students <-> Apps <-> Aid <-> Freshmen Enrollment 
	rename f_mrun fe_mrun
	merge 1:1 fe_mrun using "$pathIntermediateData/fenrollment_mrun_`y'"
	replace _merge=0 if _merge==3
	drop if _merge==2
	rename _merge no_enroll
	rename fe_mrun pe_mrun
	merge 1:1 pe_mrun using "$pathIntermediateData/penrollment_mrun_`y'", keep(1 3)
	gen enrolled_platform=(_merge==3)
	drop _merge	

	*** Students <-> Apps <-> Aid <-> Freshmen Enrollment <-> t+1 Enrollment
	rename pe_mrun e_mrun
	local t=`y'+1
	merge 1:m e_mrun using "$pathIntermediateData/enrollment_mrun_`t'"
	replace _merge=0 if _merge==3
	drop if _merge==2
	rename _merge dropout
	gen same_program=(fe_univ==e_univ) if !mi(fe_univ) & !mi(e_univ)
	replace same_program=(pe_univ==e_univ) if !mi(pe_univ)
	
	*** Students <-> Apps <-> Aid <-> Freshmen Enrollment <-> t+1 Enrollment <-> t-x Enrollment
	rename e_mrun mrun
	local t=`y'-1
	merge m:1 mrun using "$pathIntermediateData/past_enrollment_mrun_`t'", keep(1 3)
	replace _merge=0 if _merge==1
	replace _merge=1 if _merge==3
	rename _merge past_enrollment1
	local t=`y'-2
	merge m:1 mrun using "$pathIntermediateData/past_enrollment_mrun_`t'", keep(1 3)
	replace _merge=0 if _merge==1
	replace _merge=1 if _merge==3
	rename _merge past_enrollment2
	local t=`y'-3
	merge m:1 mrun using "$pathIntermediateData/past_enrollment_mrun_`t'", keep(1 3)
	replace _merge=0 if _merge==1
	replace _merge=1 if _merge==3
	rename _merge past_enrollment3

	*** Students <-> Apps <-> Aid <-> Freshmen Enrollment <-> t+1 Enrollment <-> t-x Enrollment <-> Graduation    
	rename mrun g_mrun
	bys g_mrun: keep if _n==1 //multi-enrollment/graduation
	merge 1:m g_mrun using "$pathIntermediateData/grads_mrun"
	replace _merge=0 if _merge==3
	drop if _merge==2
	rename _merge no_grad
	tab no_grad if !no_enroll
	compress
	save "$pathIntermediateData/apps_aid_fenrollment_enrollment_grads_mrun_`y'", replace
}

*** Cleanup
clear
forvalues y=2010/2012 {
	append using "$pathIntermediateData/apps_aid_fenrollment_enrollment_grads_mrun_`y'"
}
compress
label drop _all
save "$pathIntermediateData/apps_aid_fenrollment_enrollment_grads_mrun", replace

use "$pathIntermediateData/apps_aid_fenrollment_enrollment_grads_mrun", clear

*** Students and Aid
preserve
	gen grad6=inrange(g_year-fe_year,0,7) if !no_enroll
	keep g_mrun s_* f_aid_type no_aid cae_eligible past_enrollment* grad6
	rename (g_mrun s_year s_codigo_region s_año_egreso s_gpa s_lang s_math s_hist s_sci s_private s_male s_current_cohort s_old_cohort1 s_old_cohort2 s_old_cohort3 s_old_cohort4 s_old_cohort5) ///
	(rut proceso region egreso nem leng mate hria cien private_hs male current_cohort old_cohort1 old_cohort2 old_cohort3 old_cohort4 old_cohort5)
	keep rut proceso region egreso nem leng mate hria cien private_hs male s_tp s_bea f_aid_type no_aid cae_eligible current_cohort old_cohort1 old_cohort2 old_cohort3 old_cohort4 old_cohort5 past_enrollment1 past_enrollment2 past_enrollment3 grad6
	order rut proceso region egreso nem leng mate hria cien private_hs male s_tp s_bea f_aid_type no_aid cae_eligible current_cohort old_cohort1 old_cohort2 old_cohort3 old_cohort4 old_cohort5 past_enrollment1 past_enrollment2 past_enrollment3 grad6
	sort rut proceso region egreso nem leng mate hria cien private_hs male s_tp s_bea f_aid_type no_aid cae_eligible current_cohort old_cohort1 old_cohort2 old_cohort3 old_cohort4 old_cohort5 past_enrollment1 past_enrollment2 past_enrollment3 grad6
	bys rut proceso: gen aux=_n
	keep if aux==1
	drop aux
	
	gen aid_amount=0 //BET isn't relevant for our case (only CFT/IP)
	replace aid_amount=1150000 if strpos(f_aid_type,"BEA")>0
	replace aid_amount=1150000 if strpos(f_aid_type,"BJGM")>0
	replace aid_amount=1150000 if strpos(f_aid_type,"BPSU")>0
	replace aid_amount=500000 if strpos(f_aid_type,"BNM")>0 & aid_amount==0
	replace aid_amount=500000 if strpos(f_aid_type,"BHPE")>0 & aid_amount==0
	gen reference_tuition=0
	replace reference_tuition=(strpos(f_aid_type,"BBIC")>0)
gen bea=strpos(f_aid_type,"BEA")>0
gen jgm=strpos(f_aid_type,"BJGM")>0
gen nacional=strpos(f_aid_type,"BPSU")>0
gen nuevomilenio=strpos(f_aid_type,"BNM")>0
gen hijoprofe=strpos(f_aid_type,"BHPE")>0
gen bicentenario=(strpos(f_aid_type,"BBIC")>0) //same as reference_tuition
gen vocacionprofe=(strpos(f_aid_type,"BPED")>0)
gen fondosolidario=(strpos(f_aid_type,"FSCU")>0)
gen edutecnica=(strpos(f_aid_type,"BET")>0)
	compress
	save "$pathCleanData/students_mrun", replace
restore
	
*** Apps and Options
preserve
	forvalues y=2010/2012 {
		*** Students <-> Apps
		use "$pathIntermediateData/students_mrun_`y'", clear
		rename s_mrun a_mrun
		merge 1:m a_mrun using "$pathIntermediateData/apps_mrun_`y'"
		replace _merge=0 if _merge==3
		rename _merge no_app
		
		rename a_mrun fe_mrun
		merge m:1 fe_mrun using "$pathIntermediateData/fenrollment_mrun_`y'", ///
			keepus(fe_mrun fcodigo_demre_`y' fe_val*)
		replace _merge=0 if _merge==3
		drop if _merge==2
		rename _merge no_enroll
		gen enrolled_platform=(a_codigo_carrera==fcodigo_demre_`y' & a_codigo_carrera!=.)

		compress
		save "$pathIntermediateData/apps_students_`y'", replace
	}
	clear
	forvalues y=2010/2012 {
		append using "$pathIntermediateData/apps_students_`y'"
	}
	rename fe_mrun g_mrun
	compress
	save "$pathIntermediateData/appsint_mrun", replace
restore

preserve
	use "$pathIntermediateData/appsint_mrun", clear
	drop if a_year==.
	keep g_mrun a_* fe_val*  s_gpa s_lang s_math s_hist s_sci enrolled_platform s_private
	rename (a_codigo_carrera a_year) (cod proceso)
	bys cod proceso: egen last_admit_a=max(a_lugar) if a_estado_preferencia==24
	bys cod proceso: egen last_admit=mean(last_admit_a)
	bys cod proceso: egen punt_lastAdmit_a=min(a_puntaje) if a_estado_preferencia==24
	bys cod proceso: egen punt_lastAdmit=mean(punt_lastAdmit_a)
	bys cod proceso: egen rank_last_waitAdmit_a=max(a_lugar) if a_estado_preferencia==25 & enrolled_platform
	bys cod proceso: egen rank_last_waitAdmit=mean(rank_last_waitAdmit_a)
	bys cod proceso: egen punt_last_waitAdmit_a=min(a_puntaje) if a_estado_preferencia==25 & enrolled_platform
	bys cod proceso: egen punt_last_waitAdmit=mean(punt_last_waitAdmit_a)
	bys cod proceso: egen maxwaitrank_a=max(a_lugar) if a_estado_preferencia==25
	bys cod proceso: egen maxwaitrank=mean(maxwaitrank_a)
	replace punt_last_waitAdmit=punt_lastAdmit if punt_last_waitAdmit==.
	merge m:1 proceso cod using "$pathRawData/rules", nogen keep(3) ///
		keepus(proceso cod _b_nem _b_psu_leng _b_psu_mate _b_psu_hria _b_psu_cien vacantes1ersemestre vacantes2dosemestre max_hria_cien FLcode_app Area Univ Uregion sede Cquant Cqual Cprof duration psu_cut index_cut)
	merge m:1 FLcode_app using "$pathRawData/names_Majors", nogen keep(3)
	gen pred_index = _b_nem * s_gpa + _b_psu_leng * s_lang + _b_psu_mate * s_math ///
		+ _b_psu_hria * s_hist + _b_psu_cien * s_sci - max_hria_cien * min(s_hist,s_sci) * _b_psu_hria
	rename (g_mrun a_pref a_puntaje a_estado_preferencia a_lugar a_used_previous fe_valor_matricula fe_valor_arancel) (rut pref punt sit lugar used_previous valor_matricula valor_arancel)
	egen FL_UNISEDE = group(FLcode_app a_univ sede)
	
*** FLcode fixes:
do "$pathCode/fixFL.do"

	gen vacantes= vacantes1ersemestre+vacantes2dosemestre
	keep rut proceso pref cod punt pred_index sit lugar used_previous FL_UNISEDE last_admit punt_lastAdmit punt_last_waitAdmit rank_last_waitAdmit maxwaitrank Area Univ Uregion sede Cquant Cqual Cprof duration psu_cut index_cut s_private FLcode_app MajorName _b_nem _b_psu_leng _b_psu_mate _b_psu_hria _b_psu_cien vacantes max_hria_cien valor_matricula valor_arancel
	order rut proceso pref cod punt pred_index sit lugar used_previous FL_UNISEDE last_admit punt_lastAdmit punt_last_waitAdmit rank_last_waitAdmit maxwaitrank Area Univ Uregion sede Cquant Cqual Cprof duration psu_cut index_cut s_private FLcode_app MajorName _b_nem _b_psu_leng _b_psu_mate _b_psu_hria _b_psu_cien vacantes max_hria_cien valor_matricula valor_arancel
	sort rut proceso pref cod punt pred_index sit lugar used_previous FL_UNISEDE last_admit punt_lastAdmit punt_last_waitAdmit rank_last_waitAdmit maxwaitrank Area Univ Uregion sede Cquant Cqual Cprof duration psu_cut index_cut s_private FLcode_app MajorName _b_nem _b_psu_leng _b_psu_mate _b_psu_hria _b_psu_cien vacantes max_hria_cien
	bys rut proceso pref: gen aux=_n
	keep if aux==1
	drop aux
	duplicates report rut proceso pref
	merge m:1 proceso cod using "$pathRawData/waitlist_cutoff_2010_2012_new_data.dta", nogen keep(1 3)
	replace posted_slots=vacantes
	compress
	save "$pathCleanData/apps_mrun", replace
	
	keep if sit==24
	bys cod proceso: egen Selectivity=mean(punt)
	bys cod proceso: egen PrivateShare=mean(s_private)
	duplicates drop cod proceso, force
	rename (Uregion sede) (Geo_RegionCode Sede)
	keep proceso cod Area FLcode_app MajorName Univ Geo_RegionCode Sede Cquant Cqual Cprof duration FL_UNISEDE _b_nem _b_psu_leng _b_psu_mate _b_psu_hria _b_psu_cien index_cut psu_cut max_hria_cien valor_matricula valor_arancel last_admit punt_lastAdmit Selectivity PrivateShare punt_last_waitAdmit rank_last_waitAdmit maxwaitrank not_filled posted_slots slots
	order proceso cod Area FLcode_app MajorName Univ Geo_RegionCode Sede Cquant Cqual Cprof duration FL_UNISEDE _b_nem _b_psu_leng _b_psu_mate _b_psu_hria _b_psu_cien index_cut psu_cut max_hria_cien valor_matricula valor_arancel last_admit punt_lastAdmit Selectivity PrivateShare punt_last_waitAdmit rank_last_waitAdmit maxwaitrank not_filled posted_slots slots
	sort proceso cod Area FLcode_app MajorName Univ Geo_RegionCode Sede Cquant Cqual Cprof duration FL_UNISEDE _b_nem _b_psu_leng _b_psu_mate _b_psu_hria _b_psu_cien index_cut psu_cut max_hria_cien valor_matricula valor_arancel last_admit punt_lastAdmit Selectivity PrivateShare punt_last_waitAdmit rank_last_waitAdmit maxwaitrank not_filled posted_slots slots
	compress
	save "$pathCleanData/platform_options", replace
restore


*** Enrollment and Graduation
preserve
	drop if fe_year==.
	keep g_mrun f* e* enrolled_platform g* dropout same_program no_grad
	gen codigo_unico=fcodigo_unico_2012
	replace codigo_unico=fcodigo_unico_2011 if fe_year==2011
	replace codigo_unico=fcodigo_unico_2010 if fe_year==2010
	gen cod=fcodigo_demre_2012
	replace cod=fcodigo_demre_2011 if fe_year==2011
	replace cod=fcodigo_demre_2010 if fe_year==2010
	gen grad5=inrange(g_year-fe_year,0,5)
	gen grad6=inrange(g_year-fe_year,0,7)
	gen five_year_program=(fe_dur_total_carr/2<=5)
	rename (g_mrun fe_year fe_nomb_carrera fe_univ g_year fcodigo_demre_2012 fe_g8 fe_g25) ///
		(rut proceso matCarrera Univ egreso cod2012 g8 g25)
	keep rut proceso cod codigo_unico matCarrera Univ egreso cod2012 g8 g25 enrolled_platform dropout same_program no_grad grad5 grad6 five_year_program
	order rut proceso cod codigo_unico matCarrera Univ egreso cod2012 g8 g25 enrolled_platform dropout same_program no_grad grad5 grad6 five_year_program
	sort rut proceso cod codigo_unico matCarrera Univ egreso cod2012 g8 g25 enrolled_platform dropout same_program no_grad grad5 grad6 five_year_program
	bys rut proceso: gen aux=_n
	keep if aux==1
	drop aux
	compress
	save "$pathCleanData/enrollgrad_mrun", replace
restore


*** Options
use "$pathCleanData/enrollgrad_mrun", clear
keep if g8 & proceso<2012 & !mi(cod2012)

merge m:1 codigo_unico using "$pathRawData/List_Ues_BVP", keepus(BVP_Carear)

keep cod2012 proceso
duplicates drop
rename (cod2012 proceso) (cod procesog8)
gen proceso=2012
merge m:1 cod proceso using "$pathCleanData/platform_options", nogen keep (1 3) //drop if cod==40052
drop proceso
rename procesog8 proceso
gen filledg8=1
append using "$pathCleanData/platform_options"
drop if cod==40052 //something's wrong with this cod, we drop it
compress


bysort cod (proceso): gen valor_arancel_1 = valor_arancel[_n-1] // input past year's
replace valor_arancel = valor_arancel_1  if missing(valor_arancel)

egen valor_arancel_2 = mean(valor_arancel), by(Area Univ proceso) // input average area  univ
replace valor_arancel = valor_arancel_2  if missing(valor_arancel)

bysort cod (proceso): gen valor_arancel_3 = valor_arancel_2[_n-1] // input past year's average area univ
replace valor_arancel = valor_arancel_3  if missing(valor_arancel)

egen valor_arancel_4 = mean(valor_arancel), by(Univ proceso) // input average univ
replace valor_arancel = valor_arancel_4  if missing(valor_arancel)

drop valor_arancel_*



bysort cod (proceso): gen valor_matricula_1 = valor_matricula[_n-1]
replace valor_matricula = valor_matricula_1  if missing(valor_matricula)

egen valor_matricula_2 = mean(valor_matricula), by(Area Univ proceso)
replace valor_matricula = valor_matricula_2  if missing(valor_matricula)

bysort cod (proceso): gen valor_matricula_3 = valor_matricula_2[_n-1]
replace valor_matricula = valor_matricula_3  if missing(valor_matricula)

egen valor_matricula_4 = mean(valor_matricula), by(Univ proceso)
replace valor_matricula = valor_matricula_4  if missing(valor_matricula)

drop valor_matricula_*

bys FLcode_app proceso: egen arancel_referencial_proxy=median(valor_arancel)

save "$pathCleanData/options_mrun", replace

*** Add option names
preserve
forvalues y=2010/2012 {
	import delimited "$pathRawData/Applications/C_POSTULACIONES_SELECCION_PSU_`y'_PRIV_MRUN.csv", delimiter(";") clear
	keep año_proceso codigo_carrera nombre_carrera sede_carrera sigla_universidad
	duplicates drop
	rename (año_proceso codigo_carrera) (proceso cod)
	compress
	tempfile optnames`y'
	save "`optnames`y''"
}
clear
forvalues y=2010/2012 {
	append using "`optnames`y''"
	compress
	tempfile optnames
	save "`optnames'"
}	
restore

gen procesog8=proceso if filledg8==1
replace proceso=2012 if filledg8==1
merge m:1 proceso cod using "`optnames'", nogen keep(3)
replace proceso=procesog8 if filledg8==1
drop procesog8
compress

*** Add cod2012, BVP and slots
foreach y in 2010 2011 2012 2013 {
	preserve
		use "$pathIntermediateData/crosswalk_`y'_2012.dta", clear
		if `y'==2010 | `y'==2013 {
			gen bvp_`y'=.
		}
		else {
			rename codigo_unico_`y' codigo_unico
			merge 1:1 codigo_unico using "$pathRawData/codigo_unico_carreras_bvp_`y'"
			gen bvp_`y'=(_merge==3)
			drop _merge
		}
		keep codigo_demre_`y' codigo_demre_2012 bvp
		drop if mi(codigo_demre_`y') | mi(codigo_demre_2012)
		duplicates drop
		gen extrazero=(mod(codigo_demre_`y',100)==mod(codigo_demre_2012,100)) //preferred substitution
		gsort codigo_demre_`y' -extrazero codigo_demre_2012 //sorting rule:
		bys codigo_demre_`y': keep if _n==1 //preferred, else smallest 2012 code
		drop extrazero
		tempfile cw
		save "`cw'", replace
	restore
	gen codigo_demre_`y' = cod
	merge m:1 codigo_demre_`y' using "`cw'", nogen keep(1 3)
	rename codigo_demre_2012 codigo_demre_2012_`y'
}
gen demrecode=cod if proceso==2012
replace demrecode=codigo_demre_2012_2013 if proceso>=2013
replace demrecode=codigo_demre_2012_2011 if proceso==2011
replace demrecode=codigo_demre_2012_2010 if proceso<=2010

replace demrecode=cod if filledg8==1
replace demrecode=codigo_demre_2012_2010 if mi(demrecode)
replace demrecode=codigo_demre_2012_2011 if mi(demrecode)

*** Demrecode fixes:	
do "$pathCode/fix.do"

gen bvp=1 if bvp_2010==1 | bvp_2011==1 | bvp_2012==1 | bvp_2013==1
drop bvp_*

preserve
	import delimited "$pathRawData/vacantes_carreras_adm2004-2020_oferta_regular_bea.csv", encoding(UTF-8) clear delimiter(";") varnames(1) bindquote(nobind)
	rename  total_cupos_convocar seats 
	gen sobrecupo=sc_1s+sc_2s
	gen beaslots=cupos_supernum_bea
	replace beaslots = "2" if mi(real(beaslots)) //formatting issue: all are 2s
	rename cod_carrera cod
	gen proceso=proceos_adm
	replace proceso=ustrright(proceso,4)
	keep cod proceso seats sobrecupo beaslots
	destring *, replace
	compress
	tempfile seats
	save "`seats'", replace
restore
merge 1:1 cod proceso using "`seats'", nogen keep(1 3)

preserve
	*Arancel de Referencia
	use "$pathRawData/Aranceles/arancel_referencia.dta", clear
	keep if TipodeInstitución == "Universidades CRUCH" | inlist(nombre_institucion,"UNIVERSIDAD MAYOR","UNIVERSIDAD NACIONAL ANDRES BELLO","UNIVERSIDAD ADOLFO IBAÑEZ","UNIVERSIDAD FINIS TERRAE","UNIVERSIDAD DE LOS ANDES","UNIVERSIDAD DIEGO PORTALES","UNIVERSIDAD DEL DESARROLLO","UNIVERSIDAD ALBERTO HURTADO")
	rename codigo_unico codigo_unico_2011
	merge 1:1 codigo_unico_2011 using "$pathIntermediateData/crosswalk_2011_2012.dta", nogen
	keep if !mi(codigo_demre_2012)
	rename codigo_demre_2012 demrecode
	keep demrecode ref20*
	collapse (max) ref2010 ref2011 ref2012, by(demrecode)
	reshape long ref, i(demrecode) j(proceso)
	compress
	save "$pathIntermediateData/arancel_ref.dta", replace
restore

merge m:1 demrecode proceso using "$pathIntermediateData/arancel_ref.dta", keep(1 3)
bys proceso FLcode_app: egen auxref=median(ref)
replace ref= auxref if mi(ref)
drop _merge auxref

save "$pathCleanData/options_mrun", replace

*Proxy OOP
use "$pathCleanData/enrollgrad_mrun", clear
merge 1:1 rut proceso using "$pathCleanData/students_mrun", keep(3) nogen keepus(aid_amount reference_tuition)
merge m:1 cod proceso using "$pathCleanData/options_mrun", keep(1 3) nogen keepus(valor_arancel valor_matricula arancel_referencial_proxy ref)
gen oop_proxy=max(0,valor_arancel+valor_matricula-aid_amount-reference_tuition*ref)
replace oop_proxy=. if mi(valor_arancel) | mi(valor_matricula)
compress
save "$pathCleanData/enrollgrad_mrun", replace


*** Export to csv:

*Apps
use "$pathCleanData/apps_mrun.dta", clear
sort proceso rut pref
export delimited "$pathCleanData/apps_mrun.csv", replace

*Enrollment
use "$pathCleanData/enrollgrad_mrun.dta", clear
sort proceso rut
export delimited "$pathCleanData/enrollgrad_mrun.csv", replace

*Options
use "$pathCleanData/options_mrun.dta", clear
sort proceso cod
export delimited "$pathCleanData/options.csv", replace

*Students
use "$pathCleanData/students_mrun.dta", clear
sort proceso rut
export delimited "$pathCleanData/students_mrun.csv", replace



** Tables 1 and 2:

*Table 1
use "$pathIntermediateData/apps_aid_fenrollment_enrollment_grads_mrun", clear

drop fmulti_enroll
rename g_mrun mrun
drop g_*
duplicates drop
replace admit=. if no_app
drop if s_math*s_lang==0

label var s_year "Year"
label var s_male "Male"
label var s_gpa "GPA"
label var s_lang "Verbal Score"
label var s_math "Math Score"
label var s_private "Private HS"
gen metro_area=inlist(s_codigo_region,5,8,13)
label var metro_area "Metro Area"
gen eligible = (s_lang+s_math)/2>=450
label var eligible "App. Eligible"
gen app=!no_app
label var app "Platform App."
label var admit "Platform Adm."
gen admitfirst=(a_pref==1 & admit==1) if !no_app
label var admitfirst "1st Pref. Adm."
replace enrolled_platform=. if admit!=1
label var enrolled_platform "Platform Enr."
replace dropout=. if enrolled_platform!=1
label var dropout "Platform Drop."


forvalues y=2010/2012 {
	estpost sum s_male s_private metro_area s_gpa s_math s_lang app if s_year==`y'
	est store s`y'
}

*Student Panel
esttab s2010 s2011 s2012 using desc2.tex, replace ///
mtitles("\textbf{\emph{Year 2010}}" "\textbf{\emph{Year 2011}}" "\textbf{\emph{Year 2012}}") ///
refcat(s_male "\textbf{\emph{Test Takers}}", nolabel) ///
collabels(\multicolumn{1}{c}{{Mean}} \multicolumn{1}{c}{{Std.Dev.}}) ///
cells("mean(fmt(2)) sd(fmt(2))") label nonumber f alignment(S) booktabs

*Apps Panel
use "$pathCleanData/apps_mrun.dta", clear
gen listed7=pref==7
gen listed3=pref==3
bys rut proceso: egen l7=max(listed7)
bys rut proceso: egen l3=max(listed3)
keep if pref==1
*10 (Medical Sciences), 11 (Engineering)
gen first_medicalsci=(Area==10) & Univ<38
gen first_engineering=(Area==11) & Univ<38

merge 1:1 rut proceso using "$pathCleanData/students_mrun", keepusing(current_cohort) keep(1 3) nogen

label var first_medicalsci "Top choice in G25 Medical Sciences"
label var first_engineering "Top choice in G25 Engineering and Technology"
label var l7 "Ranked at least 7 programs"
label var l3 "Ranked at least 3 programs"
label var punt "Score at top-ranked program"
label var current_cohort "Enrolled in HS while applying"

forvalues y=2010/2012 {
	estpost sum punt first_engineering first_medicalsci l3 l7 current_cohort if proceso==`y' & Univ<38
	est store a`y'
}

esttab a2010 a2011 a2012 using desc2.tex, append ///
refcat(punt "\textbf{\emph{G25 Applicants' Top Choice}}", nolabel) ///
cells("mean(fmt(2)) sd(fmt(2))") label nonumber f alignment(S) booktabs ///
nomtitles collabels(none)


*Enrollment Panel
use "$pathIntermediateData/apps_aid_fenrollment_enrollment_grads_mrun", clear
rename g_mrun mrun
drop s_* g_*
duplicates drop
keep if a_estado_preferencia==24
gen gotinto25=(a_univ<38)
gen enroll_other=(fe_g8==0 & fe_g25==0 & no_enroll==0)
gen enroll_g8=(fe_g8==1 & fe_g25==0 & no_enroll==0)
gen enroll_g25=(fe_g8==0 & fe_g25==1 & no_enroll==0)
label var enroll_g25 "G25 Enrollee"
label var enroll_g8 "G8 Enrollee"
label var no_enroll "Other/Unenrolled"

forvalues y=2010/2012 {
	estpost sum enroll_g25 enroll_g8 no_enroll if gotinto25 & a_year==`y'
	est store e`y'
}

esttab e2010 e2011 e2012 using desc2.tex, append ///
refcat(enroll_g25 "\textbf{\emph{G25 Admits}}", nolabel) ///
cells("mean(fmt(2)) sd(fmt(2))") label nonumber f alignment(S) booktabs ///
nomtitles collabels(none)


eststo clear


*Table 2
clear
forvalues y=2009/2015 {
	append using "$pathIntermediateData/students_mrun_`y'"
}
compress

gen type1=s_male*s_private
gen type2=s_male*(1-s_private)
gen type3=(1-s_male)*s_private
gen type4=(1-s_male)*(1-s_private)
gen gpa = s_gpa / 100
gen lang = s_lang / 110
gen math = s_math / 110

drop if s_mrun==16667933 & s_rbd==0 & s_year==2015
rename (s_mrun s_year s_current_cohort) (mrun year current)
keep mrun year type* gpa lang math current
compress
save "$pathIntermediateData/type", replace


clear
forvalues y=2009/2015 {
	*** Students <-> Apps
	use "$pathIntermediateData/students_mrun_`y'", clear
	if `y'==2015 {
		drop if s_mrun==16667933 & s_rbd==0 //demre messed up
	}
	rename s_mrun a_mrun
	merge 1:m a_mrun using "$pathIntermediateData/apps_mrun_`y'", keep(1 3)
	gen applied=(_merge==3)
	drop _merge	
	gen admit = a_estado_preferencia==24
	gsort a_mrun -admit
	bys a_mrun: gen aux=_n
	keep if aux==1 //warning: keeping only one line per app
	drop aux
	tempfile studapp`y'
	save "`studapp`y''"
}
clear
forvalues y=2009/2015 {
	append using "`studapp`y''"
}
compress

rename (s_year a_mrun) (year mrun)
merge 1:1 year mrun using "$pathIntermediateData/type", keep(1 3)

reg admit b2011.year#c.type* type* gpa math lang if applied & year>2009, r
est sto adm3

clear
forvalues y=2009/2015 {
	*** Apps <-> Platform Enrollment
	use "$pathIntermediateData/apps_mrun_`y'", clear
	gen admit = a_estado_preferencia==24
	gsort a_mrun -admit
	bys a_mrun: gen aux=_n
	keep if aux==1 //warning: keeping only one line per app
	drop aux
	rename a_mrun pe_mrun
	merge 1:1 pe_mrun using "$pathIntermediateData/penrollment_mrun_`y'", keep(1 3)
	gen enrolled_platform=(_merge==3)
	drop _merge	
	tempfile appenroll`y'
	save "`appenroll`y''"
}
clear
forvalues y=2009/2015 {
	append using "`appenroll`y''"
}
compress

rename (a_year pe_mrun) (year mrun)
merge 1:1 year mrun using "$pathIntermediateData/type", keep(1 3)

gen admitg25=admit*a_g25
reg enrolled_platform b2011.year#c.type* type* gpa math lang if admitg25 & year>2009, r
est sto enr3

clear
forvalues y=2009/2015 {
	*** Platform Enrollment <-> t+1 Enrollment
	use "$pathIntermediateData/penrollment_mrun_`y'", clear
	rename pe_mrun e_mrun
	local t=`y'+1
	merge 1:m e_mrun using "$pathIntermediateData/enrollment_mrun_`t'"
	replace _merge=0 if _merge==3
	drop if _merge==2
	rename _merge dropout
	tempfile enrolldropout`y'
	save "`enrolldropout`y''"
}
clear
forvalues y=2009/2015 {
	append using "`enrolldropout`y''"
}
compress

rename (pe_year e_mrun) (year mrun)
merge m:1 year mrun using "$pathIntermediateData/type", keep(1 3)

reg dropout b2011.year#c.type* type* gpa math lang if pe_g25 & year>2009, r
est sto dro3

clear
forvalues y=2009/2015 {
	*** Platform Enrollment <-> Graduation
	use "$pathIntermediateData/penrollment_mrun_`y'", clear
	rename pe_mrun g_mrun
	merge 1:m g_mrun using "$pathIntermediateData/grads_mrun", keep(1 3)
	gen graduated=(_merge==3)
	drop _merge	
	gen grad5=inrange(g_year-pe_year,0,5) & graduated
	gen grad6=inrange(g_year-pe_year,0,6) & graduated
	gen grad7=inrange(g_year-pe_year,0,7) & graduated
	tempfile enrollgrad`y'
	save "`enrollgrad`y''"
}
clear
forvalues y=2009/2015 {
	append using "`enrollgrad`y''"
}
compress

rename (pe_year g_mrun) (year mrun)
merge m:1 year mrun using "$pathIntermediateData/type", keep(1 3)

reg grad6 b2011.year#c.type* type* gpa math lang if pe_g25 & year<=2013 & year>2009, r
reg grad7 b2011.year#c.type* type* gpa math lang if pe_g25 & year<=2012 & year>2009, r
est sto gra3

label var year "Year"
label var type1 "Male\$\times\$Private"
label var type2 "Male\$\times\$Public"
label var type3 "Female\$\times\$Private"
label var type4 "Female\$\times\$Public"
label var gpa "GPA (SDs)"
label var lang "Verbal Score (SDs)"
label var math "Math Score (SDs)"

esttab *3 using regs3.tex, wide noomitted mtitle("Admission" "Enrollment" "Dropout" "Graduation") nonumbers eqlabels(none) collabels(none) booktabs replace cells((b(star fmt(%9.3f))) (se(fmt(%9.3f)par))) label nobaselevels interaction("\$\times\$") substitute("=" " ") se star(* 0.10 ** 0.05 *** 0.01) postfoot("\hline\hline" "\end{tabular}" "}" "\\ \footnotetext{Note: This table shows estimates of the average difference in each outcome, for each type of student, and for each year after 2009. The base year is 2011 and the base type is Female-Public. Admission refers to the probability of being assigned a seat in the platform; Enrollment refers to the probability of enrolling in a platform program conditional on being admitted in a G25 option; Dropout refers to the probability of not being enrolled in any option the year after enrolling in a G25 program; and Graduation refers to the probability of graduating within 7 years of enrolling in a G25 program. The estimating equation includes student covariates (GPA and test scores) and student-type fixed effects. These estimated coefficients are not reported in the table. The results on graduation rates are constrained to years before 2013 because we do not have data after 2019. Robust standard errors in parentheses. * $ p<0.10 $, ** $ p<0.05 $, *** $ p<0.01 $ }") drop(type* gpa lang math)

